*** 
*** Code for "The Labor Market Integration of Refugee Migrants in High-Income Countries"
*** Courtney Brell, Christian Dustmann, and Ian Preston
***
*** Analysis of the Household, Income and Labour Dynamics in Australia survey
*** This file should be run in the folder containing the HILDA data
***

********************************************************************************
*** Preliminaries

clear all

* Keep only variables of interest
local usefulvars "xwaveid wave ancob anyoa anref hhhqivw hhwte hhwtrp hhwtsc hgsex hgage hgint anmigc aneab anengf edhigh1 edfts edagels edhists helth wscg wscei wsfei esbrd esdtl"

* Import relevant waves
set maxvar 10000
capture noisily erase "temp.dta"
foreach wavechar in d e f g h i j k l m n o p q{
	use "2. STATA 170c (Zip File 1 of 2 - Combined Data Files)\Combined_`wavechar'170c", clear
	rename `wavechar'* *
	gen wave="`wavechar'"
	keep `usefulvars'
	capture noisily append using "temp.dta"
	save "temp.dta", replace
}

* Generate demographic variables
gen countryoforigin=ancob
label values countryoforigin QCOUNTRY
gen female = (hgsex==2)
gen age = hgage

* Calculate the number of years since arrival
gen yeararrived=anyoa
gen yearofinterview=substr(hhhqivw,7,4)
destring yearofinterview, replace
gen yearssincearrive=yearofinterview-yeararrived
bysort wave: egen yearofwave = mode(yearofinterview)

* Identify migrant types
gen native = (countryoforigin==1101)
gen refugee = (anref==1)|(anmigc==4)
gen immigrant = (countryoforigin!=1101&countryoforigin>=0)&(refugee!=1)
gen migranttype=0 if native==1
replace migranttype=1 if refugee==1
replace migranttype=2 if immigrant==1
label define Lmigrant 0 "Native" 1 "Refugee" 2 "Other immigrant"
label values migranttype Lmigrant
replace yearssincearrive=0 if migranttype==0
* Drop refugees from this sample
drop if refuge==1

* Choose our sample
drop if missing(migranttype)
keep if (yearssincearrive>=0&yearssincearrive<=10)
keep if age>=20&age<=64
* Restrict to respondents
keep if hgint==1
* Use survey weights
gen indweight=hhwtrp

********************************************************************************
*** Calculate labor market outcomes

* Employment
gen employment=.
replace employment=0 if esbrd==2|esbrd==3
replace employment=1 if esbrd==1

* Wages (reported in weekly wage)
gen wage=wscei if wscei>0
replace wage=. if employment==0

* Deflate to 2015 prices(OECD cpi)
gen index=.
replace yearofinterview=yearofwave if missing(yearofinterview)|yearofinterview<0
replace index=75.0 if yearofinterview==2004
replace index=77.1 if yearofinterview==2005
replace index=79.8 if yearofinterview==2006
replace index=81.7 if yearofinterview==2007
replace index=85.2 if yearofinterview==2008
replace index=86.7 if yearofinterview==2009
replace index=89.25006 if yearofinterview==2010
replace index=92.19875 if yearofinterview==2011
replace index=93.82401 if yearofinterview==2012
replace index=96.12259 if yearofinterview==2013
replace index=98.51405 if yearofinterview==2014
replace index=100 if yearofinterview==2015
replace index=101.277 if yearofinterview==2016
replace index=103.2505 if yearofinterview==2017
replace index=105.2	if yearofinterview==2018
replace wage=wage*100/index

* Count numbers of nonmissing observations
gen Nemp=!missing(employment)
gen Ninc=!missing(wage)

preserve
collapse (mean) employment avg_income=wage (rawsum) Nemp Ninc [aw=indweight], by(female yearssincearrive migranttype)
save "AU-HILDA", replace
restore
preserve
collapse (mean) employment avg_income=wage (rawsum) Nemp Ninc [aw=indweight], by(yearssincearrive migranttype)
append using "AU-HILDA"
order yearssincearrive migranttype female employment Nemp avg_income Ninc
sort migranttype female yearssincearrive
save "AU-HILDA", replace
restore


********************************************************************************
*** Calculate sample descriptives

forval loopmig=0(2)2{
preserve
	disp `loopmig'
	keep if migranttype==`loopmig'

	* # Observations
	count
	* # Unique individuals
	egen persTag=tag(xwaveid)
	tab persTag

	* Gender
	sum female [aw=indweight]
	* Age
	sum age [aw=indweight], detail

	* Time since arrival
	sum yearssincearrive [aw=indweight], detail

	* Age at arrival
	gen age_at_arrival=age-yearssincearrive
	sum age_at_arrival [aw=indweight], detail

	* Year of arrival
	gen year_of_arrival=yearofinterview-yearssincearrive
	sum year_of_arrival [aw=indweight], detail

	* LM outcomes
	count if !missing(employment)
	sum employment [aw=indweight], detail
	count if !missing(wage)
	sum wage [aw=indweight], detail
	
	* Country of origin
	capture gen dummy=1
	collapse (sum) numrefugees=dummy [iw=indweight], by(countryoforigin)
	egen totalrefugees=sum(numrefugees)
	gen fracrefugees=numrefugees/totalrefugees
	gsort -fracrefugees
	gen thecounter=_n
	list countryoforigin fracrefugees if thecounter<=5
restore
}

********************************************************************************
*** Clean up

capture noisily erase "temp.dta"
clear all
